import urllib.request
import lxml
import lxml.etree
import re


def makeurllist(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"}
    request = urllib.request.Request(url, headers=headers)  # 请求，修改，模拟http.
    data = urllib.request.urlopen(request).read().decode('gbk')  # 打开请求，抓取数据  抓取数据也需要正确解码
    mytree = lxml.etree.HTML(data)                      #生成xml文档树
    mylist = mytree.xpath("//div[@class=\"artlist clearfix\"]//dt//text()")  #获取文章日期 和文章内容
    urllist=[] #生成480 页url链接
    for i  in range(1,480):
        urllist.append("http://www.jb51.net/list/list_97_"+str(i)+".htm")
    return  urllist

def  gettitilefromurl(url):
    headers = {"User-Agent": "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0);"}
    request = urllib.request.Request(url, headers=headers)  # 请求，修改，模拟http.
    data = urllib.request.urlopen(request).read()  # 打开请求，抓取数据
    mytree = lxml.etree.HTML(data)
    urllist=mytree.xpath("//*[@class=\"artlist clearfix\"]/dl//dt/a/@href")
    titlelist = mytree.xpath("//*[@class=\"artlist clearfix\"]/dl//dt/a/@title")
    print(urllist,titlelist)


# print(makeurllist("https://www.jb51.net/list/list_97_1.htm"))
gettitilefromurl("https://www.jb51.net/list/list_97_5.htm")
